Basic descriptives of overall activity
# PER INFLUENCER
tw <- tw %>%
mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL))
tw <- as.data.table(tw)
# most active profiles
unique(tw[,.N,PROFILE][order(-N)]) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most popular
tw %>%
group_by(PROFILE) %>%
summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
arrange(desc(FOLLOW)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>%
group_by(PROFILE) %>%
summarise(REACH = sum(REACH)) %>%
arrange(desc(REACH)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>%
group_by(PROFILE) %>%
summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
arrange(desc(INTERACTIONS)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
group_by(PROFILE) %>%
summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
arrange(desc(FAVORITE)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
group_by(PROFILE) %>%
summarise(RETWEET = sum(RETWEET_COUNT)) %>%
arrange(desc(RETWEET)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET
# most popular
tw %>%
select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
arrange(desc(FOLLOWERS_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>%
select(PROFILE, FULL_TEXT, REACH,URL) %>%
arrange(desc(REACH)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>%
select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
arrange(desc(INTERACTIONS)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
arrange(desc(FAVORITE_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
arrange(desc(RETWEET_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
Check forum activity
## word sentiment brija
## 1: mladost 0.324070 NEG
## 2: matkun 0.080595 POZ
## 3: brend 0.302140 NEG
## 4: povećalo 0.458300 POZ
## 5: radnik 0.270550 POZ
## 6: pakt 0.483980 POZ
## 7: propitkivati 0.386890 POZ
## 8: stignuti 0.558510 NEG
## 9: danteov 0.344940 NEG
## 10: pripovjedan 0.394750 POZ
## 11: nepromjenjivost 0.124840 NEG
## 12: ispuštati 0.504460 NEG
## 13: elektorski 0.042709 NEG
## 14: aco 0.532640 NEG
## 15: korpus 0.518950 POZ
# read in data
forum <- as.data.table(forum)
forum[,.N, TITLE][order(-N)] %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
forum[TITLE == "Zoran Milanović, predsjednik Republike Hrvatske vol. IV",] %>%
unnest_tokens(word,FULL_TEXT) -> ZM_token
# remove stop words, numbers, single letters
ZM_token %>%
anti_join(stop_corpus, by = "word") %>%
mutate(word = gsub("\\d+", NA, word)) %>%
mutate(word = gsub("^[a-zA-Z]$", NA, word)) -> ZM_tokenTidy
# remove NA
ZM_tokenTidy %>%
filter(!is.na(word)) -> ZM_tokenTidy
ZM_tokenTidy[,.N,by = word][order(-N),]
## word N
## 1: quote 3264
## 2: milanović 1643
## 3: hdz 1274
## 4: onda 838
## 5: predsjednik 811
## ---
## 38457: sišao 1
## 38458: oblacima 1
## 38459: smotre 1
## 38460: rasipanje 1
## 38461: hašomana 1
## Vizualize most common words
ZM_tokenTidy[,.N,by = word][N>500][order(-N),][,word := reorder(word,N)] %>%
ggplot(aes(word, N)) +
geom_col() +
xlab(NULL) +
coord_flip() +
theme_economist()
